** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.1: Number of Priorities /////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*************** Adjust courses and check variables ***************

d prior* curso* career*
drop prior_*
replace curso2=62 if curso2==31
replace curso3=62 if curso3==31

*************** Priority disciplines - 2nd and 3rd Choice ***************

gen second_choice=(curso2!=.)
gen third_choice=(curso3!=.)
rename curso2 career2
merge m:1 career2  using "Original data/priority_discipline.dta"
renvarlab prior_port- prior_lang, prefix(second_)
tab _merge if career2~=.
drop if _merge==2
drop career1 career3 _merge

* Correct for changes in priority disciplines over time:
* Philosophy: in 2000 and 2001, Portuguese and Mathematics were priority disciplines. Starting in 2002, only Portuguese was a priority discipline.
replace second_prior_math=1 if (year==2000 | year==2001) & career2==30
* Dentistry: in 2000 and 2001, it required Biology and Chemistry as priority disciplines. Starting in 2002, only Biology was required.
replace second_prior_chem=1 if (year==2000 | year==2001) & career2==14
* Physical Education: Until 2002, only Biology was priority discipline. Starting in 2003, Biology and History.
replace second_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career2==27
replace second_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career2==45
* Arts:	in 2000, only Portuguese was priority discipline
replace second_prior_hist=0 if year==2000 & career2==25

rename curso3 career3
merge m:1 career3  using "Original data/priority_discipline.dta"
renvarlab prior_port- prior_lang, prefix(third_)
drop if _merge==2
tab _merge if career3~=.
drop career1 _merge

* Correct for changes in priority disciplines over time:
* Philosophy: in 2000 and 2001, Portuguese and Mathematics were priority disciplines. Starting in 2002, only Portuguese was a priority discipline.
replace third_prior_math=1 if (year==2000 | year==2001) & career3==30
* Dentistry: in 2000 and 2001, it required Biology and Chemistry as priority disciplines. Starting in 2002, only Biology was required.
replace third_prior_chem=1 if (year==2000 | year==2001) & career3==14
* Physical Education: Until 2002, only Biology was priority discipline. Starting in 2003, Biology and History.
replace third_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career3==27
replace third_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career3==45
* Arts:	in 2000, only Portuguese was priority discipline
replace third_prior_hist=0 if year==2000 & career3==25

******** Generate priority dummies ********

gen priority_second=0 if career2~=.
levelsof subject, local(levels) 
foreach s of local levels {
replace priority_second=second_prior_`s' if subject=="`s'"
}
tab priority_second
tab priority priority_second, row
label var priority_second "Priority 2\textsuperscript{nd} Choice"
mdesc priority_second

gen priority_third=0 if career3~=.
levelsof subject, local(levels) 
foreach s of local levels {
replace priority_third=third_prior_`s' if subject=="`s'" 
}
tab priority_third
tab priority priority_third, row
label var priority_third "Priority 3\textsuperscript{rd} Choice"
mdesc priority_third

******** Create variables with priority discipline characteristics ********

** Number of priorities associated with each choice:
bys inscri2: egen n_prior_1=total(priority)
bys inscri2: egen n_prior_2=total(priority_second)
bys inscri2: egen n_prior_3=total(priority_third)
tab1 n_prior_*

** Number of priorities associated with any choice:
egen priority_any=anymatch(priority priority_second priority_third), values(1)
bys inscri2: egen n_prior_any=total(priority_any)
tab n_prior_any

sum n_prior_1 n_prior_any

ttest n_prior_any, by(female)

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM
foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 rename priority_Portuguese priopor
 drop *Language* *Portuguese* 
 rename priopor priority_Portuguese
 
*********************************************************************************
****************   Regressions **************************************************
*********************************************************************************
 
*** Number of priorities

label var norm_enem_w_g "ENEM"
label var female "Female"
label var n_prior_1 "\# priorities - 1st choice"
label var n_prior_any "\# priorities - Any choice"

estimates clear

foreach x in n_prior_1 n_prior_any  {
reg `x' female if subject=="biol", cluster(inscri2) 
estimates store reg_`x'
reg `x' female norm_enem_w_g if subject=="biol", cluster(inscri2) 
estimates store reg_`x'_enem
reghdfe `x' female norm_enem_w_g if subject=="biol", cluster(inscri2) absorb(career_choice)
estimates store reg_`x'_career1
}


estadd local major_fe "No": reg_n_prior_1 reg_n_prior_any reg_n_prior_1_enem reg_n_prior_any_enem 
estadd local major_fe "Yes": reg_n_prior_1_career1 reg_n_prior_any_career1

* Tex
esttab reg_n_prior_1 reg_n_prior_1_enem reg_n_prior_any reg_n_prior_any_enem reg_n_prior_any_career1 using "Output/Gender_Score_Other_Priorities.tex", se star(* 0.10 ** 0.05 *** 0.01) booktabs stats(N major_fe, fmt(%9.0fc) labels("Number of observations" "Major FE" ))  b(%7.3f) se(%7.3f)  replace f label  nogaps  keep(female norm_enem_w_g) /// 
 mgroups("First choice" "All choices", pattern(1 0 1 0 0)  prefix(\multicolumn{@span}{c}{) suffix(})span erepeat(\cmidrule(lr){@span})) collabels(none)  nomtitle


